General view of the galaxy data

### Import the galaxy data set
galaxy <- read.csv("galaxy.txt")
velocity <- galaxy$velocity
east.west <- galaxy$east.west
north.south <- galaxy$north.south
galaxy <- data.frame(y=galaxy$velocity, x1=galaxy$east.west, x2=galaxy$north.south)
n <- length(velocity)
p <- 2
y <- galaxy$y
galaxy.3d <- plot_ly(x = ~x1, y = ~x2, z = ~y,  data=galaxy,
                     size = 20, type = "scatter3d", mode = "markers",
                     color = ~y)%>%layout(scene = list(xaxis = list(title = "West-East"),
                      yaxis = list(title = "South-North"),
                      zaxis = list(title = "Velocity")),
                      title = '', font=font) %>% colorbar(title = "Measured Radial Velocity")
galaxy.3d
### Marginal variation of measured radial velocity with east-west and north-south coordinate
# with(galaxy, plot(north.south, velocity,  cex = 0.5))
# with(galaxy, plot(east.west, velocity, cex = 0.5))

Ordinary least squares estimators

### Ordinary Least Squares
multilinear <- lm(y ~ x1 + x2, data = galaxy)

## Plot the fitted surface
multilinear.coef <- coef(multilinear)
ew <- seq(min(galaxy$x1), max(galaxy$x1), length.out = 100)
ns <- seq(min(galaxy$x2), max(galaxy$x2), length.out = 100)
multilinear.fit.vel <- outer(ew,ns,function(a,b){
  multilinear.coef[1]+multilinear.coef[2]*b+multilinear.coef[3]*a})
OLS.surface <- galaxy.3d %>% add_surface(x=~ew, y=~ns, z=~multilinear.fit.vel, 
                                          colors = "pink", opacity=0.5, showscale=FALSE,
                                          type = "surface", mode = "markers", showlegend = F)%>%layout(scene = list(xaxis = list(title = "East-West"),
                      yaxis = list(title = "North-South"),
                      zaxis = list(title = "Velocity")),
                      title = '') %>%hide_colorbar()
## Compute AIC
multilinear.aic <- function(model, data){
  # model is linear model object
  # data is a data.frame containing the sample data
  y <- data$y
  X <- model.matrix(model)
  H <- X%*%solve(t(X)%*%X)%*%t(X)
  d <- sum(diag(H))
  ybar <- mean(y)
  yhat <- H%*%y
  sigmahat2 <- sum((y-yhat)^2)/(n-d)
  return(-2*mean(dnorm(y,H%*%y,sqrt(sigmahat2),log=TRUE)) + 2*mean(diag(H)))
}
multilinear.aic(multilinear, galaxy)
## [1] 9.768828

Ordinary Ridge Regression - Penalized linear regression

\(\lambda\) is chosen to minimize the AIC

### Ridge Regression model
## Compute AIC and the lambda that minimizes AIC
ridge.aic <- function(data, lambda){
  # data is a data.frame containing the sample data
  # lambda is the ridge penalty
  y <- data$y
  X <- cbind(1,data$x1,data$x2)
  p <- dim(X)[2]
  XtX <- crossprod(X)
  H <- X%*%solve(XtX + diag(lambda,p))%*%t(X)
  d <- sum(diag(H))
  ybar <- mean(y)
  yhat <- H%*%y
  sigmahat2 <- sum((y-yhat)^2)/(n-d)
  return(-2*mean(dnorm(y,H%*%y,sqrt(sigmahat2),log=TRUE)) + 2*mean(diag(H)))
}

## AIC vs. lambda
lambda.vals_ <- exp(seq(-5,5,length.out=100))
aic.vals_ <- sapply(lambda.vals_, function(k){ridge.aic(galaxy, k)})
plot(lambda.vals_, aic.vals_, main=expression(Ridge~Penalty:~AIC~vs.~lambda),
     xlab = expression(lambda), ylab = "AIC", type = "l", col = "red")
ridge.lambda.opt <- optimize(f=function(k){ridge.aic(galaxy,k)},interval=exp(seq(-5,5,length.out=100)))$minimum
ridge.aic.opt <- optimize(f=function(k){ridge.aic(galaxy,k)},interval=exp(seq(-5,5,length.out=100)))$objective
lines(rep(ridge.lambda.opt,2), c(0,20), lty = 2, col = "black")

ridge.lambda.opt
## [1] 0.006817714
ridge.aic.opt
## [1] 9.768829
## Fit ridge regression model
galaxy.ridge <- function(lambda){
  y <- galaxy$y
  x1 <- galaxy$x1
  x2 <- galaxy$x2
  X <- cbind(1, x1, x2)
  XtX <- crossprod(X)
  p <- dim(X)[2]
  betahat <- solve(XtX + diag(lambda,p))%*%t(X)%*%y
  H <- X%*%solve(XtX + diag(lambda,p))%*%t(X)
  d <- sum(diag(H))
  yhat <- H%*%y
  sigmahat2 <- sum((y-yhat)^2)/(n-d)
  return(list(coefficients=betahat, sigmahat=sqrt(sigmahat2)))
}
ridge.coef <- galaxy.ridge(ridge.lambda.opt)$coefficients
ridge.fit.vel <- outer(ew,ns,function(a,b){
  ridge.coef[1]+ridge.coef[2]*b+ridge.coef[3]*a})
ridge.surface <- galaxy.3d %>% add_surface(x=~ew, y=~ns, z=~ridge.fit.vel, colors = "magenta",opacity=0.5,
                          type = "surface", mode = "markers", showscale=FALSE, showlegend = F)%>%layout(scene = list(xaxis = list(title = "East-West"),
                      yaxis = list(title = "North-South"),
                      zaxis = list(title = "")),
                      title = "") %>% hide_colorbar()

Polynomial Regression - Cubic polynomials

### Fitting polynomial
poly.formula <- paste("y~x1+I(x1^2)+I(x1^3)",
                                 "+x2+I(x2^2)+I(x2^3)",
                                 sep="")
poly.formula <- as.formula(poly.formula)
poly <- lm(poly.formula, data=galaxy)
poly.coef <- coef(poly)
poly.fit.vel <- outer(ew,ns,function(a,b){
  poly.coef[1]+poly.coef[2]*b+poly.coef[3]*(b^2)+poly.coef[4]*(b^3)+poly.coef[5]*a+poly.coef[6]*(a^2)+poly.coef[7]*(a^3)})
poly.surface <- galaxy.3d %>% add_surface(x=~ew, y=~ns, z=~poly.fit.vel, 
                          type = "surface", mode = "markers", showlegend = F, colorscale = "Oranges",
                          opacity=0.9, showscale=F)%>%layout(scene = list(xaxis = list(title = "East-West"),
                      yaxis = list(title = "North-South"),
                      zaxis = list(title = "")),
                      title = "")%>%hide_colorbar()

## Compute AIC
poly.aic <- function(model, data){
  # model is linear model object
  # data is a data.frame containing the sample data
  y <- data$y
  X <- model.matrix(model)
  H <- X%*%solve(t(X)%*%X)%*%t(X)
  d <- sum(diag(H))
  ybar <- mean(y)
  yhat <- H%*%y
  sigmahat2 <- sum((y-yhat)^2)/(n-d)
  return(-2*mean(dnorm(y,H%*%y,sqrt(sigmahat2),log=TRUE)) + 2*mean(diag(H)))
}
poly.aic(poly, galaxy)
## [1] 9.011007

Additive Model - Smoothing Splines

\(\lambda\) is chosen to minimize the AIC

### Fit additive model
## Choose lambda that minimizes AIC for the galaxy datasets
galaxy.spline.AIC <- function(data, lambda) {
  # data is a data.frame object containing the data
  # lambda is a scalar for the penalty lambda
  y <- data$y
  x1 <- data$x1
  x2 <- data$x2
  n <- length(y)
  ybar <- mean(y)
  # Point constraint is chosen to be the mean velocity
  add.smooth.1 <- smoothCon(s(x1, bs="bs", pc=0), data=data, absorb.cons = T)
  add.smooth.2 <- smoothCon(s(x2, bs="bs", pc=0), data=data, absorb.cons= T)
  smooth.X1 <- add.smooth.1[[1]]$X
  smooth.X2 <- add.smooth.2[[1]]$X
  smooth.S1 <- add.smooth.1[[1]]$S[[1]]
  smooth.S2 <- add.smooth.2[[1]]$S[[1]]
  smooth.S <- Matrix::bdiag(smooth.S1,smooth.S2)
  smooth.X <- cbind(smooth.X1, smooth.X2)
  smooth.XtX <- crossprod(smooth.X)
  smooth.Hat <- smooth.X %*% solve(smooth.XtX + lambda*smooth.S) %*% t(smooth.X)
  yhat <- smooth.Hat%*%(y-ybar) + ybar
  trHat <- sum(diag(smooth.Hat))
  sigmahat2 <- sum((y - yhat)^2)/(n-trHat)
  aic <- -2*mean(dnorm(y,yhat,sqrt(sigmahat2),log=TRUE)) + 2*mean(diag(smooth.Hat))
  return(aic) 
}

## AIC vs. lambda
lambda.vals <- exp(seq(-5,5,length.out=100))
aic.vals <- sapply(lambda.vals, function(k){galaxy.spline.AIC(galaxy, k)})
plot(lambda.vals, aic.vals, main=expression(Additive~Model:~AIC~vs.~lambda),
     xlab = expression(lambda), ylab = "AIC", type = "l", col = "red")
lambda.opt <- optimize(function(k){galaxy.spline.AIC(galaxy,k)}, interval = exp(seq(-5,5,length.out=100)))$minimum
aic.opt <- galaxy.spline.AIC(galaxy,lambda.opt) # AIC
lines(rep(lambda.opt,2), c(8,10), lty = 2, col = "black")

lambda.opt
## [1] 0.8703812
aic.opt
## [1] 8.647406
## Fit model with lambda.opt and plot surface
galaxy.spline.pred <- function(lambda, newdata){
  # lambda is a scalar for the penalty lambda
  # newdata a data.frame containing the new covariates to do predictions
  y <- galaxy$y
  n <- length(y)
  ybar <- mean(galaxy$y)
  
  # Point constraint is chosen to be the sample mean of velocities
  add.smooth.1 <- smoothCon(s(x1, bs="bs", pc=0),data=galaxy,absorb.cons=T)
  add.smooth.2 <- smoothCon(s(x2, bs="bs", pc=0),data=galaxy,absorb.cons=T)
  smooth.X1 <- add.smooth.1[[1]]$X
  smooth.X2 <- add.smooth.2[[1]]$X
  smooth.S1 <- add.smooth.1[[1]]$S[[1]]
  smooth.S2 <- add.smooth.2[[1]]$S[[1]]
  smooth.S <- Matrix::bdiag(smooth.S1,smooth.S2)
  smooth.X <- cbind(smooth.X1, smooth.X2)
  smooth.XtX <- crossprod(smooth.X)
  smooth.betahat <- solve(smooth.XtX + lambda*smooth.S)%*%t(smooth.X)%*%(y-ybar)
  smooth.Hat <- smooth.X %*% solve(smooth.XtX + lambda*smooth.S) %*% t(smooth.X)
  yhat <- smooth.Hat%*%(y-ybar) + ybar
  trHat <- sum(diag(smooth.Hat))
  sigmahat2 <- sum((y - yhat)^2)/(n-trHat)
  
  # predictions
  new.x1 <- PredictMat(add.smooth.1[[1]],data.frame(x1 = newdata$x1))
  new.x2 <- PredictMat(add.smooth.2[[1]],data.frame(x2 = newdata$x2))
  new.X <- cbind(new.x1,new.x2)
  predictions <- new.X%*%smooth.betahat + ybar
  return(list(predictions=predictions,
              betahat=smooth.betahat,
              yhat=yhat,
              sigmahat2=sigmahat2))
}
new.coord <- expand.grid(x1=ew,x2=ns)
add.fit.vel <- galaxy.spline.pred(lambda.opt, new.coord)
add.fit.vel.pred <- matrix(add.fit.vel$predictions, ncol = 100, nrow = 100, byrow=T)
additive.surface <- galaxy.3d %>% add_surface(x=~ew, y=~ns, z=~add.fit.vel.pred, 
                          type = "surface", mode = "markers", showlegend = F, colorscale = 'Blues',
                          opacity=0.9, showscale=F)%>%layout(scene = list(xaxis = list(title = "East-West"),
                      yaxis = list(title = "North-South"),
                      zaxis = list(title = "")),
                      title = "")%>%hide_colorbar()

A constrast with reference point at the origin \((0,0)\):

### Implement a constrat with reference point at the origin for 
###  the additive model
## Fit model with lambda.opt and plot surface
galaxy.spline.contrast <- function(lambda, newdata, ref){
  # lambda is a scalar for the penalty lambda
  # newdata a data.frame containing the new covariates for predictions
  # ref is a data.frame containing the reference point
  y <- galaxy$y
  n <- length(y)
  ybar <- mean(galaxy$y)
  
  # Point constraint is chosen to be the sample mean of velocities
  add.smooth.1 <- smoothCon(s(x1, bs="bs", pc=0),data=galaxy,absorb.cons=T)
  add.smooth.2 <- smoothCon(s(x2, bs="bs", pc=0),data=galaxy,absorb.cons=T)
  smooth.X1 <- add.smooth.1[[1]]$X
  smooth.X2 <- add.smooth.2[[1]]$X
  smooth.S1 <- add.smooth.1[[1]]$S[[1]]
  smooth.S2 <- add.smooth.2[[1]]$S[[1]]
  smooth.S <- Matrix::bdiag(smooth.S1,smooth.S2)
  smooth.X <- cbind(smooth.X1, smooth.X2)
  smooth.XtX <- crossprod(smooth.X)
  smooth.betahat <- solve(smooth.XtX + lambda*smooth.S)%*%t(smooth.X)%*%(y-ybar)
  smooth.Hat <- smooth.X %*% solve(smooth.XtX + lambda*smooth.S) %*% t(smooth.X)
  yhat <- smooth.Hat%*%(y-ybar) + ybar

  # predictions
  new.x1 <- PredictMat(add.smooth.1[[1]],data.frame(x1 = newdata$x1))
  new.x2 <- PredictMat(add.smooth.2[[1]],data.frame(x2 = newdata$x2))
  ref.x1 <- PredictMat(add.smooth.1[[1]],data.frame(x1 = ref$x1))
  ref.x2 <- PredictMat(add.smooth.2[[1]],data.frame(x2 = ref$x2))
  new.X <- cbind(new.x1,new.x2)
  ref.X <- cbind(ref.x1, ref.x2)
  predictions <- new.X%*%smooth.betahat + ybar
  pred.ref <- ref.X%*%smooth.betahat + ybar
  contrasts <- predictions - rep(pred.ref, length(predictions))
  
  # 95% confidence intervals of contrasts
  smooth.Hat <- smooth.X %*% solve(smooth.XtX + lambda*smooth.S) %*% t(smooth.X)
  trHat <- sum(diag(smooth.Hat))
  sigmahat2 <- sum((y - yhat)^2)/(n-trHat)
  smooth.badConn <- solve(smooth.XtX + lambda*smooth.S)
  smooth.varbeta <- sigmahat2*(smooth.badConn%*%smooth.XtX%*%smooth.badConn)
  contrast.mat <- sweep(new.X,2,ref.X,'-')
  var.contrast <- contrast.mat%*%smooth.varbeta%*%t(contrast.mat)
  contrasts.se <- diag(sqrt(var.contrast))
  return(list(contrasts=contrasts,
              betahat=smooth.betahat,
              contrasts.se = contrasts.se))
}
origin <- data.frame(x1=0,x2=0)
add.cont.vel <- galaxy.spline.contrast(lambda.opt, new.coord, origin)
add.cont.vel.grid <- matrix(add.cont.vel$contrasts, ncol = 100, nrow = 100, byrow=T)
add.cont.vel.se <- matrix(add.cont.vel$contrasts.se, ncol = 100, nrow = 100, byrow=T)
contrast.surface <- plot_ly(x=~ew, y=~ns, z=~add.cont.vel.grid, type = "surface", 
                            mode = "markers", showlegend = F, color=~add.cont.vel.grid,
                            opacity=0.9, showscale=T)%>%colorbar(title = "",len=0.75, tickness=20)
contrast.surface%>%layout(scene = list(xaxis = list(title = "West-East"),
                      yaxis = list(title = "South-North"),
                      zaxis = list(title = "")),
                      title = "")
contrast.surface %>% hide_colorbar()
image(ew, ns, -add.cont.vel.grid,
      col = viridis(100),
      xlab = "East-West",
      ylab = "South-North",
      main = "Heat Map of Contrast")
contour(x = ew, y = ns, z = -add.cont.vel.grid, nlevels = 50, xlab = "East-West",
ylab = "South-North", main = "", add = TRUE)
lines(c(-25, 25), c(45, -45), lty = 2, lwd = 2, col = "red")
text(8.5, -19, 
     labels = "Steepest Change", col = "red", srt=-30, fonts="Arial")

## Confidence Surfaces
# Z.q <- qnorm(0.975)
# ybar <- mean(galaxy$y)
# galaxy.3d %>%add_surface(x=~ew, y=~ns, z=~ybar+add.cont.vel.grid+Z.q*add.cont.vel.se, 
#        type = "surface", mode = "markers", showlegend = F, colors = 'purple', showlegend = F,
#        opacity=0.3, showscale=F)%>% add_surface(x=~ew, y=~ns, z=~ybar+add.cont.vel.grid-Z.q*add.cont.vel.se, showlegend = F,
#                                  colors = 'purple', opacity=0.5) %>%
#   colorbar(title = "Measured Velocity")%>%layout(scene = list(xaxis = list(title = "East-West"),
#                                        yaxis = list(title = "North-South"),
#                                        zaxis = list(title = "")),
#                           title = 'Change in Measured Velocity Relative to Origin')%>%colorbar(title = "Change in Measured Velocity")%>% layout(scene = list(aspectratio = list(x=2,y=2,z=2),
#                                                                                               xaxis = list(title = "East-West", range = range(ew)), yaxis = list(title = "North-South", range = range(ns)), zaxis = list(title = "")))%>%hide_colorbar()
### Residuals plots for Additive Model
# load necessary packages
library(ggplot2)  
result <- galaxy.spline.pred(lambda.opt,galaxy)
yhat <- result$predictions
sigmahat2 <- result$sigmahat2
residuals <- (y - yhat)/sqrt(sigmahat2)

# Visualize residuals
ggplot(data.frame(x1 = galaxy$x1, x2 = galaxy$x2, residuals = residuals),
       aes(x = x1, y = residuals)) + 
  ggtitle("Studentized Residuals vs Fitted Values") +
  geom_point() +
  xlab("Fitted Values") +
  ylab("Residuals") +
  geom_hline(yintercept = c(-1.96, 0, 1.96), color = "red", lty = 2)  

qqnorm(residuals, main = "Residuals Normal Q-Q Plot")
qqline(residuals, col = "red") 

hist(residuals, breaks = 20, probability = T, main = "Histogram of Residuals",
     xlab = "Studentized Residuals")
rr <- seq(-5, 5, length.out = 1000)
dd <- dnorm(rr)
lines(rr, dd, col = "red", lty = 1)
legend("topleft", "N(0,1) PDF", lty = 2, col = "red")

Cross Validation

### Cross validation for additive model 
additive.cv <- function(train, test, lambda=lambda.opt){
  # train and test are data.frame objects containing the train and test set
  y.train <- train$y
  y.test <- test$y
  ybar.train <- mean(train$y)
  
  # Point constraint chosen to be the mean of the train data
  add.smooth.1 <- smoothCon(s(x1, bs="bs", pc=0),data=train,absorb.cons=T)
  add.smooth.2 <- smoothCon(s(x2, bs="bs", pc=0),data=train,absorb.cons=T)
  smooth.X1 <- add.smooth.1[[1]]$X
  smooth.X2 <- add.smooth.2[[1]]$X
  smooth.S1 <- add.smooth.1[[1]]$S[[1]]
  smooth.S2 <- add.smooth.2[[1]]$S[[1]]
  smooth.S <- Matrix::bdiag(smooth.S1,smooth.S2)
  smooth.X <- cbind(smooth.X1, smooth.X2)
  smooth.XtX <- crossprod(smooth.X)
  smooth.betahat <- solve(smooth.XtX + lambda*smooth.S)%*%t(smooth.X)%*%(y.train-ybar.train)

  # predictions
  new.x1 <- PredictMat(add.smooth.1[[1]],data.frame(x1 = test$x1))
  new.x2 <- PredictMat(add.smooth.2[[1]],data.frame(x2 = test$x2))
  new.X <- cbind(new.x1,new.x2)
  predictions <- new.X%*%smooth.betahat + ybar.train
  return(predictions)
}

## Additive Model with Smoothing Splines
partition.Kfold <- function(k, data, seed=4446){
  # k is the number of folds
  # data is a data.frame object containing the data
  # seed is a number for the random seed
  set.seed(seed)
  n <- dim(data)[1]
  if (k==n){ # leave-one-out if k==n
    partitions <- list()
    for (i in 1:n){
      partitions[[i]] <- data[i,]
    }
    return(partitions)
  }
  i <- c(1:n)
  size <- floor(n/k)
  partitions <- list()
  for (j in 1:k){
    if (j==k){# if k does not divide n, the last fold will have a bit more data
      partitions[[j]] <- data[i,]
    }else{
       selects <- sample(i, size, replace=F)
       remove <- sapply(selects, function(k){which(i==k)})
      i <- i[-remove]
      chosens <- data[selects, ]
      partitions[[j]] <- chosens
    }
  }
  return(partitions)
}

## cross validation
cross.validate.project <- function(data.par, mod='additive') {
  # data.par is an object returned by partition.Kfold
  # mod is a type of models used in this project: "lm","poly","ridge","additive"
  stopifnot(mod%in%c('lm','poly','ridge','additive'))
  n <- sum(unlist(lapply(data.par, function(k){dim(k)[1]})))
  k <- length(data.par)
  loss <- 0
  for (i in 1:k){
    test <- data.par[[i]]
    train <- Reduce(rbind, data.par[-i])
    y.test <- test$y
    ## ordinary linear regression
    if (mod=="lm"){
      model <- lm(y~x1+x2, data=train)
      yhat.test <- predict(model, test)
    } 
    ## polynomial regression
    else if (mod=="poly"){
      poly.formula <- paste("y~x1+I(x1^2)+I(x1^3)",
                            "+x2+I(x2^2)+I(x2^3)",sep="")
      poly.formula <- as.formula(poly.formula)
      model <- lm(poly.formula, data=train)
      yhat.test <- predict(model, test)
    }
    else if (mod=="ridge"){
      y.train <- train$y
      x1.train <- train$x1
      x2.train <- train$x2
      x1.test <- test$x1
      x2.test <- test$x2
      X.train <- cbind(1, x1.train, x2.train)
      X.test <- cbind(1, x1.test, x2.test)
      XtX.train <- crossprod(X.train)
      p.train <- dim(X.train)[2]
      betahat.train <- solve(XtX.train + diag(ridge.lambda.opt,p.train))
      betahat.train <- betahat.train%*%t(X.train)%*%y.train
      yhat.test <- X.test%*%betahat.train
    } else{
      yhat.test <- additive.cv(train, test)
    }
    loss.i <- sum((y.test-yhat.test)^2)
    loss <- loss + loss.i
  }
  return(sum(loss)/n)
}
### Perform cross validations on the models above with k = 20
folds <- 10
galaxy.par <- partition.Kfold(folds, galaxy)
galaxy.par.loo <- partition.Kfold(n, galaxy)
CV.kfold.score <- c(cross.validate.project(galaxy.par,"lm"),
              cross.validate.project(galaxy.par,"ridge"),
              cross.validate.project(galaxy.par,"poly"),
              cross.validate.project(galaxy.par,"additive"))
CV.kfold.score <- sapply(CV.kfold.score, function(s){round(s, 3)})
CV.loo.score <- c(cross.validate.project(galaxy.par.loo,"lm"),
              cross.validate.project(galaxy.par.loo,"ridge"),
              cross.validate.project(galaxy.par.loo,"poly"),
              cross.validate.project(galaxy.par.loo,"additive"))
CV.loo.score <- sapply(CV.loo.score, function(s){round(s, 3)})

Model Performances

train.err.OLS <- mean((galaxy$y - predict(multilinear, galaxy))^2)
train.err.ridge <- mean((galaxy$y - (ridge.coef[1]+ridge.coef[2]*galaxy$x1+ridge.coef[3]*galaxy$x2))^2)
train.err.poly <- mean((galaxy$y - predict(poly, galaxy))^2)
train.err.add <- mean((galaxy$y - galaxy.spline.pred(lambda.opt, data.frame(x1=0,x2=0))$yhat)^2)
train.err <- round(rbind(train.err.OLS, train.err.ridge, train.err.poly, train.err.add),3)
AIC.score <- c(multilinear.aic(multilinear, galaxy),
               ridge.aic.opt,
               poly.aic(poly, galaxy),
               aic.opt)
AIC.score <- sapply(AIC.score, function(s){round(s, 3)})
rows <- c("OLS", "Ridge Penalty", 
          "Cubic Polynomial",
          "Additive")
summary <- cbind(rows, AIC.score,  train.err, CV.kfold.score, CV.loo.score)
columns <- c("Method", "AIC", "Training Error","Est. MPSE: K(=10)-Fold CV", "Est. MPSE: leave-one-out CV")
sum.table <- kable(summary, col.names = columns, booktab=T, align="l")
sum.table 
Method AIC Training Error Est. MPSE: K(=10)-Fold CV Est. MPSE: leave-one-out CV
train.err.OLS OLS 9.769 1004.587 1025.709 1027.596
train.err.ridge Ridge Penalty 9.769 1004.588 1025.721 1027.598
train.err.poly Cubic Polynomial 9.011 459.228 494.062 493.24
train.err.add Additive 8.647 305.941 339.485 344.503